import requests
import re
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.append(["作者","内容"])
headers = {
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}

url = "https://www.qiushibaike.com/text/page/{}/"
head = re.compile(r'<div class="author clearfix">(.*?)</div>',re.S)
name = re.compile(r'<h2>(.*?)</h2>',re.S)
con = re.compile(r'\n<span>.*?(.*?)</span>\n\n',re.S)

for i in range (1,13):
    url1=url.format(i)
    response = requests.get(url1, headers=headers)
    ret = response.text

    list = []
    dic = {}
    host_name = head.findall(ret)
    print(host_name)
    for d in host_name:
        host_name = name.findall(d)[0]
        host_name = host_name.replace('\n', '').replace(' ', '')
        list.append(host_name)
    content = con.findall(ret)
    b = 0
    for c in content:
        c = c.replace('\n', '').replace(' ', '').replace('<br/>', '')
        a = list[b]
        dic[a] = c
        b += 1
    print("第{}页".format(i))

    for k,v in dic.items():
        print(k)
        ws.append([k,v])
        print("作者：{}---内容：{}".format(k,v))
    wb.save("data/糗事.xlsx")